from openai import OpenAI


client = OpenAI(api_key='123', base_url='http://localhost:8000/v1')

response = client.completions.create(
    model="/home/leo/model_file/llama-2-7b.Q8_0.gguf",
    prompt=[
        "介绍一下无锡"
    ],
    stream=True,
)

for chunk in response:
    print(chunk.choices[0].text, end='')
print("")